"""Note this map task runner is specifically made to handle heavy keys where
The data might be very large at a particular reduce worker. It doesn't write
The output to a dictionary rather produces to file where each key is on a
single line.
Conditions:
1. The key should not contain ':' in it anywhere
2. Any value in the output should not contain any newline character or a comma
"""

from ma.commons.core.maptaskrunner import MapTaskRunner
from ma.commons.core.processrunner import ProcessRunner
from exceptions import *
import ma.const
import ma.log
from .partitioner import Partitioner 

import os
import time
import sys
import pickle
import traceback



class MRMapTaskRunnerHeavyKey(MapTaskRunner):
    # TODO: Remove the threads argument ... no use
    def __init__(self, tip_info, user_module_name, user_class_name):
        """Map Task Runner constructs Task Runner, and the main output 
        dictionary
        """
        MapTaskRunner.__init__(self, tip_info, user_module_name, user_class_name)
        self.__log = ma.log.get_logger("ma.codes")
        

    def writeOutputToLocalFile(self):    
        #working out a map output file path name
        self.output_dest_dir = ma.const.JobsXmlData.get_filepath_str_data(ma.const.xml_local_output_temp_dir, self.tip_info.job_id)
        
        writer_dict = {}
        
        self.partitioner = Partitioner(self.tip_info.job_id)
        
        # will be used to identify the hashes to which no value has been assigned
        list_of_hashes = self.partitioner.returnAllHashVals()
        
        for key in self.key_value_dict:
            hash_value = self.partitioner.partition(key)
            if hash_value not in writer_dict:
                writer_dict[hash_value] = {}
                # mark this hash value as taken
                list_of_hashes.remove(hash_value)
            writer_dict[hash_value][key] = self.key_value_dict[key]
        
        try:
            self.__log.info('Starting to write map %s outputs', self.tip_info.returnTaskID())
            
            # write files which have some output values assigned
            for hash_value in writer_dict:
                output_filepath = self.output_dest_dir + os.sep + \
                    ma.const.JobsXmlData.get_str_data(ma.const.xml_map_output_filename_with_key, self.tip_info.job_id, self.tip_info.task_id, str(hash_value))
                fd = open(output_filepath, "w+")
                # write every key on a separate line
                MapTaskRunner.write_keylines(fd, writer_dict[hash_value])
                fd.close()
                self.__log.debug('Written map %s output: %s', self.tip_info.returnTaskID(), output_filepath)
            
            # write files with empty dictionaries where no keys ended up
            for hash_value in list_of_hashes:
                output_filepath = self.output_dest_dir + os.sep + \
                    ma.const.JobsXmlData.get_str_data(ma.const.xml_map_output_filename_with_key, self.tip_info.job_id, self.tip_info.task_id, str(hash_value))
                fd = open(output_filepath, "w+")
                # create file and leave empty
                fd.close()
                self.__log.debug('Written map %s output: %s', self.tip_info.returnTaskID(), output_filepath)
            
            self.__log.info('Written %d outputs for map %s', len(writer_dict) + len(list_of_hashes), self.tip_info.returnTaskID())
            return True
        except IOError as err_msg:
            self.__log.error('Error while writing map output task %s: %s', self.tip_info.returnTaskID(), err_msg)
            return False
            


if __name__ == "__main__":
    """This function is called by the process creator to transfer information
    about the map task to run
    """
    
    try:
        print("Running MRMapTaskRunnerHeavyKey process pid", os.getpid())
        
        if len(sys.argv) == 5:
            user_module_name = sys.argv[1]
            user_class_name = sys.argv[2]
            job_id = sys.argv[3]
            task_id = sys.argv[4]
            #print user_module_name, user_class_name
            
            # get the filename which holds the reduce compute misc. output
            proc_temp_filename = ma.const.JobsXmlData.get_str_data(ma.const.xml_map_proc_temp_filename, job_id, task_id)
            
            # get the pickled list data
            list = ProcessRunner.pull_pickled_data(proc_temp_filename, job_id)
            maptipinfo = list.__getitem__(0)
            #print 'Job ID:', maptipinfo.job_id
            #print 'Task ID:', maptipinfo.task_id
            #print 'Map or reduce:', maptipinfo.map_or_reduce
            #print 'Struct data:', maptipinfo.struct_data
            #print 'Input data:', maptipinfo.input_data
            
            print('MRMapTaskRunnerHeavyKey received all args for', maptipinfo.returnTaskID(), 'process pid', os.getpid())
            
            task_runner = MRMapTaskRunnerHeavyKey(maptipinfo, user_module_name, user_class_name)
            # open files, process, write output
            task_runner.startTask()
        else:
            print('Wrong number of arguments for MRMapTaskRunnerHeavyKey process pid', os.getpid())
            os._exit(-1)

    except Exception as err_msg:
        traceback.print_exception()
        print("MRMapTaskRunnerHeavyKey:__main__ Exception in running map pid", os.getpid(), ":", err_msg)
        os._exit(-1)
